1/3/2021

R Subset

This dataset comes from kaggle web site https://www.kaggle.com/roshansharma/sanfranciso-crime-dataset and I will use it to develop a data product, as final project.We subset the dataset for 3 differents graphs, Crime type (df2), Crime in each district (df3) and most dangerous streets (df4), for the sake of visualization graph we have to give more importance to those cases that have more than 200 or 100 (n>200 or n>100)

PoliceData <- data.frame(latitude, longitud, category,incidntNum,districts,dayoftheweek,resolution,street)
FinalData <- PoliceData[complete.cases(PoliceData), ]
df <- distinct(FinalData, incidntNum, .keep_all = TRUE)
df2 <- count(df, category, sort = TRUE)
dfdistrict <- subset(df,districts !='')
df3 <- count(dfdistrict,districts,sort = TRUE)
df2 <- subset(df2,n>100)
df4<-count(df,street,sort = F)
dfsub<-subset(df4,n>200)

LeafLet Map

df[1:2000,] %>% leaflet() %>% addTiles() %>% addCircleMarkers(~longitud,~latitude,radius=3 , color="black",  fillColor="blue", stroke = TRUE, fillOpacity = 0.8, group="Blue")

Crime Type

p1<-ggplot(df2, aes(x = reorder(category, -n), y = n)) + 
  geom_bar(stat="identity")+labs(title="Crime Histogram") + 
  labs(x="Crime type",y="Number of Cases")+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
(fig1<- ggplotly(p1))

Amount of crime in each district

p2<-ggplot(df3, aes(x = reorder(districts, -n), y = n)) + 
  geom_bar(stat="identity",fill='blue')+labs(title="Crime Histogram") + 
  labs(x="District",y="Number of Cases")+
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))
(fig2<- ggplotly(p2))

%## Most Dangerous streets

%```{r leaflet3, echo=TRUE, warning=FALSE,fig.height=5, fig.width=6} %p4<-ggplot(dfsub, aes(x = reorder(street, -n), y = n)) + % geom_bar(stat=“identity”,fill =‘red’)+labs(title=“Crime Histogram”) + % labs(x=“Crime type”,y=“Number of Cases”)+ % theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5)) %(fig4<- ggplotly(p4))

%```